In [1]:
import numpy
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 5]
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
import time
from time import gmtime, strftime
from datetime import datetime
In [2]:
GOOG = pandas.read_csv('GOOG_stock_sample-2/GOOG_1min_sample_cordates.csv', sep=";")
GOOG['date'] = pandas.to_datetime(GOOG['date'], format="%d.%m.%Y %H:%M")
GOOG.name = "GOOG"
In [3]:
FDX = pandas.read_csv('FDX_stock_sample/FDX_1min_sample_cordates.csv', sep=";")
FDX['date'] = pandas.to_datetime(FDX['date'], format="%d.%m.%Y %H:%M")
FDX.name = "FDX"
In [4]:
GS = pandas.read_csv('GS_stock_sample/GS_1min_sample_cordates.csv', sep=";")
GS['date'] = pandas.to_datetime(GS['date'], format="%d.%m.%Y %H:%M")
GS.name = "GS"
In [5]:
KO = pandas.read_csv('KO_stock_sample/KO_1min_sample_cordates.csv', sep=";")
KO['date'] = pandas.to_datetime(KO['date'], format="%d.%m.%Y %H:%M")
KO.name = "KO"
In [6]:
initial = {}
initial['GOOG'] = GOOG
initial['FDX'] = FDX
initial['GS'] = GS
initial['KO'] = KO
ik = list(initial.keys())
In [7]:
initial_cut = {}
for t in ik:
    for k in range(4,8) :
            keydate = t + " Stock" + " Apr " + str(k)
            initial_cut[keydate] = initial[t].iloc[int(1960/5*(k-4)):int(1960/5*(k-3))]
ik_cut = list(initial_cut.keys())

OHLC plots

In [8]:
import plotly.graph_objects as go
import plotly.express as px
In [9]:
def ohlcplot(dataframe, stockname):
    fig = go.Figure(data=go.Ohlc(x=dataframe['date'],
                    open=dataframe['open'],
                    high=dataframe['high'],
                    low=dataframe['low'],
                    close=dataframe['close']))
    fig.update(layout_xaxis_rangeslider_visible=False)
    
    #fig.update_layout(shapes = [dict(x0='2022-04-04 16:00:00', x1='2022-04-04 16:00:00', y0=0, y1=1, xref='x', yref='paper',line_width=2, line_color = "#191970", opacity=0.4)])


    fig.update_xaxes(
            rangebreaks=[

                # NOTE: Below values are bound (not single values), ie. hide x to y
                #dict(bounds=["sat", "mon"]),  hide weekends, eg. hide sat to before mon
                dict(bounds=[16, 9.5], pattern="hour"),  # hide hours outside of 9.30am-4pm
                # dict(values=["2019-12-25", "2020-12-24"])  # hide holidays (Christmas and New Year's, etc)
            ]
        )
    fig.update_layout(
            title= 'OHLC plot',
            yaxis_title= stockname + " Stock"
        )
    fig.update_layout(plot_bgcolor="#E5E4E2")
    fig.update_yaxes(color="black")
    fig.update_xaxes(color="black")
    fig.update_layout(title_font_color="black")
    
    
    fig.show()
    
In [10]:
for t in ik:
    ohlcplot(initial[t], t)

Pre-work

In [11]:
numpy.random.seed(7)
scaler = MinMaxScaler(feature_range=(0, 1))
In [12]:
initial_close_cut = {}
dataset_close_cut = {}
    
for t in ik_cut:
    initial_close_cut[t] = pandas.DataFrame(initial_cut[t].iloc[0:392, 4])
    dataset_close_cut[t] = scaler.fit_transform(initial_close_cut[t][['close']].astype('float64'))
    
ick_cut = list(initial_close_cut.keys())
dck_cut = list(dataset_close_cut.keys())
In [13]:
import warnings
warnings.filterwarnings("ignore")


import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

LSTM

In [14]:
lstm_1_rmse_train = {}
lstm_1_rmse_test = {}
lstm_1_mape_train = {}
lstm_1_mape_test = {}

lstm_WM_rmse_train = {}
lstm_WM_rmse_test = {}
lstm_WM_mape_train = {}
lstm_WM_mape_test = {}

lstm_TS_rmse_train = {}
lstm_TS_rmse_test = {}
lstm_TS_mape_train = {}
lstm_TS_mape_test = {}

lstm_MBB_rmse_train = {}
lstm_MBB_rmse_test = {}
lstm_MBB_mape_train = {}
lstm_MBB_mape_test = {}

lstm_SMBB_rmse_train = {}
lstm_SMBB_rmse_test = {}
lstm_SMBB_mape_train = {}
lstm_SMBB_mape_test = {}
In [15]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return numpy.array(dataX), numpy.array(dataY)



def lstm_reg(dataset, stockname, lookback = 1, plotname = "", isstacked = ""):
    # split into train and test sets
    train_size = int(len(dataset) * 0.6)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
    #print(dataset)
    #print(scaler.inverse_transform(train))
    #print(scaler.inverse_transform(test))
    #print(scaler.inverse_transform(dataset))
    # reshape into X=t and Y=t+1
    look_back = lookback
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    #print(trainX, trainY)
    #print(testX, testY)
    if plotname == "" or plotname == " with Window Method":
        # reshape input to be [samples, time steps, features]
        trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
        testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
        # create and fit the LSTM network
        model = Sequential()
        model.add(LSTM(4, input_shape=(1, look_back)))
        model.add(Dense(1))
        model.compile(loss='mean_squared_error', optimizer='adam')
        model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)
        # make predictions
        #trainPredict = model.predict(trainX)
        #testPredict = model.predict(testX)
        # invert predictions
        trainPredict = scaler.inverse_transform(model.predict(trainX))
        trainY = scaler.inverse_transform([trainY])
        testPredict = scaler.inverse_transform(model.predict(testX))
        testY = scaler.inverse_transform([testY])
        trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
        testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
        trainScore_mape = mean_absolute_percentage_error(trainY[0], trainPredict[:,0])*100
        testScore_mape = mean_absolute_percentage_error(testY[0], testPredict[:,0])*100
        if plotname == "":
            lstm_1_rmse_train[stockname] = trainScore
            lstm_1_rmse_test[stockname] = testScore
            lstm_1_mape_train[stockname] = trainScore_mape
            lstm_1_mape_test[stockname] = testScore_mape
        else: 
            lstm_WM_rmse_train[stockname] = trainScore
            lstm_WM_rmse_test[stockname] = testScore
            lstm_WM_mape_train[stockname] = trainScore_mape
            lstm_WM_mape_test[stockname] = testScore_mape
            
    else:
        # reshape input to be [samples, time steps, features]
        trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
        testX = numpy.reshape(testX, (testX.shape[0], testX.shape[1], 1))
        if plotname == " with Time Steps":
            # create and fit the LSTM network
            model = Sequential()
            model.add(LSTM(4, input_shape=(look_back, 1)))
            model.add(Dense(1))
            model.compile(loss='mean_squared_error', optimizer='adam')
            model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=0)
            # make predictions
            trainPredict = scaler.inverse_transform(model.predict(trainX))
            trainY = scaler.inverse_transform([trainY])
            testPredict = scaler.inverse_transform(model.predict(testX))
            testY = scaler.inverse_transform([testY])
            trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
            testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
            trainScore_mape = mean_absolute_percentage_error(trainY[0], trainPredict[:,0])*100
            testScore_mape = mean_absolute_percentage_error(testY[0], testPredict[:,0])*100
            lstm_TS_rmse_train[stockname] = trainScore
            lstm_TS_rmse_test[stockname] = testScore
            lstm_TS_mape_train[stockname] = trainScore_mape
            lstm_TS_mape_test[stockname] = testScore_mape
        else: 
            if isstacked == "":
                # create and fit the LSTM network
                batch_size = 1
                model = Sequential()
                model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True))
                model.add(Dense(1))
                model.compile(loss='mean_squared_error', optimizer='adam')
                for i in range(100):
                    model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
                    model.reset_states()
                # make predictions
                trainPredict = scaler.inverse_transform(model.predict(trainX, batch_size=batch_size))
                trainY = scaler.inverse_transform([trainY])
                model.reset_states()
                testPredict = scaler.inverse_transform(model.predict(testX, batch_size=batch_size))
                testY = scaler.inverse_transform([testY])
                trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
                testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
                trainScore_mape = mean_absolute_percentage_error(trainY[0], trainPredict[:,0])*100
                testScore_mape = mean_absolute_percentage_error(testY[0], testPredict[:,0])*100
                lstm_MBB_rmse_train[stockname] = trainScore
                lstm_MBB_rmse_test[stockname] = testScore
                lstm_MBB_mape_train[stockname] = trainScore_mape
                lstm_MBB_mape_test[stockname] = testScore_mape
            else:
                # create and fit the LSTM network
                batch_size = 1
                model = Sequential()
                model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=True))
                model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True))
                model.add(Dense(1))
                model.compile(loss='mean_squared_error', optimizer='adam')
                for i in range(100):
                    model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
                    model.reset_states()
                # make predictions
                trainPredict = scaler.inverse_transform(model.predict(trainX, batch_size=batch_size))
                trainY = scaler.inverse_transform([trainY])
                model.reset_states()
                testPredict = scaler.inverse_transform(model.predict(testX, batch_size=batch_size))
                testY = scaler.inverse_transform([testY])
                trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
                testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
                trainScore_mape = mean_absolute_percentage_error(trainY[0], trainPredict[:,0])*100
                testScore_mape = mean_absolute_percentage_error(testY[0], testPredict[:,0])*100
                lstm_SMBB_rmse_train[stockname] = trainScore
                lstm_SMBB_rmse_test[stockname] = testScore
                lstm_SMBB_mape_train[stockname] = trainScore_mape
                lstm_SMBB_mape_test[stockname] = testScore_mape
    # calculate root mean squared error   
    
    print('Train Score: %.3f RMSE' % (trainScore))
    print('Test Score: %.3f RMSE' % (testScore))
    print('Train Score: %.3f MAPE' % (trainScore_mape))
    print('Test Score: %.3f MAPE' % (testScore_mape))
    
    # shift train predictions for plotting
    trainPredictPlot = numpy.empty_like(dataset)
    trainPredictPlot[:, :] = numpy.nan
    trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
    # shift test predictions for plotting
    testPredictPlot = numpy.empty_like(dataset)
    testPredictPlot[:, :] = numpy.nan
    testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
    # plot baseline and predictions
    #plt.plot(scaler.inverse_transform(dataset), color = "black")
    #plt.plot(trainPredictPlot)
    #plt.plot(testPredictPlot)
    #plt.show()

    fig = px.line(scaler.inverse_transform(dataset), color_discrete_sequence=["black"])
    fig.add_traces(
        list(px.line(trainPredictPlot, color_discrete_sequence=["#409a73"]).select_traces())
    )
    fig.add_traces(
        list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces())
    )
    fig.update_layout(
        title= isstacked + 'LSTM model prediction' + plotname,
        yaxis_title= stockname, 
        xaxis_title = "N. of observation"
    )
    fig.update_layout(plot_bgcolor="#E5E4E2")
    fig.update_layout(showlegend=False)
    fig.update_yaxes(color="black")
    fig.update_xaxes(color="black")
    fig.update_layout(title_font_color="black")

    fig.show()
    
In [16]:
for t in ick_cut:
    lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t)
Metal device set to: Apple M1
Train Score: 1.974 RMSE
Test Score: 1.179 RMSE
Train Score: 0.050 MAPE
Test Score: 0.032 MAPE
Train Score: 1.982 RMSE
Test Score: 1.414 RMSE
Train Score: 0.052 MAPE
Test Score: 0.038 MAPE
Train Score: 2.113 RMSE
Test Score: 2.982 RMSE
Train Score: 0.056 MAPE
Test Score: 0.077 MAPE
Train Score: 2.889 RMSE
Test Score: 2.035 RMSE
Train Score: 0.076 MAPE
Test Score: 0.056 MAPE
Train Score: 0.230 RMSE
Test Score: 0.089 RMSE
Train Score: 0.069 MAPE
Test Score: 0.031 MAPE
Train Score: 0.233 RMSE
Test Score: 0.297 RMSE
Train Score: 0.082 MAPE
Test Score: 0.111 MAPE
Train Score: 0.204 RMSE
Test Score: 0.158 RMSE
Train Score: 0.073 MAPE
Test Score: 0.061 MAPE
Train Score: 0.197 RMSE
Test Score: 0.208 RMSE
Train Score: 0.068 MAPE
Test Score: 0.080 MAPE
Train Score: 0.244 RMSE
Test Score: 0.135 RMSE
Train Score: 0.054 MAPE
Test Score: 0.033 MAPE
Train Score: 0.221 RMSE
Test Score: 0.139 RMSE
Train Score: 0.051 MAPE
Test Score: 0.033 MAPE
Train Score: 0.263 RMSE
Test Score: 0.225 RMSE
Train Score: 0.060 MAPE
Test Score: 0.059 MAPE
Train Score: 0.235 RMSE
Test Score: 0.199 RMSE
Train Score: 0.058 MAPE
Test Score: 0.050 MAPE
Train Score: 0.039 RMSE
Test Score: 0.032 RMSE
Train Score: 0.047 MAPE
Test Score: 0.045 MAPE
Train Score: 0.035 RMSE
Test Score: 0.045 RMSE
Train Score: 0.041 MAPE
Test Score: 0.055 MAPE
Train Score: 0.042 RMSE
Test Score: 0.023 RMSE
Train Score: 0.046 MAPE
Test Score: 0.028 MAPE
Train Score: 0.034 RMSE
Test Score: 0.021 RMSE
Train Score: 0.041 MAPE
Test Score: 0.026 MAPE
In [17]:
for t in ick_cut:
    lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t, 3, " with Window Method")
Train Score: 1.945 RMSE
Test Score: 1.289 RMSE
Train Score: 0.049 MAPE
Test Score: 0.035 MAPE
Train Score: 2.057 RMSE
Test Score: 1.640 RMSE
Train Score: 0.053 MAPE
Test Score: 0.045 MAPE
Train Score: 2.223 RMSE
Test Score: 3.216 RMSE
Train Score: 0.060 MAPE
Test Score: 0.086 MAPE
Train Score: 2.787 RMSE
Test Score: 2.291 RMSE
Train Score: 0.073 MAPE
Test Score: 0.066 MAPE
Train Score: 0.218 RMSE
Test Score: 0.088 RMSE
Train Score: 0.067 MAPE
Test Score: 0.031 MAPE
Train Score: 0.215 RMSE
Test Score: 0.548 RMSE
Train Score: 0.075 MAPE
Test Score: 0.221 MAPE
Train Score: 0.196 RMSE
Test Score: 0.158 RMSE
Train Score: 0.070 MAPE
Test Score: 0.062 MAPE
Train Score: 0.199 RMSE
Test Score: 0.239 RMSE
Train Score: 0.072 MAPE
Test Score: 0.094 MAPE
Train Score: 0.245 RMSE
Test Score: 0.157 RMSE
Train Score: 0.057 MAPE
Test Score: 0.039 MAPE
Train Score: 0.221 RMSE
Test Score: 0.153 RMSE
Train Score: 0.051 MAPE
Test Score: 0.038 MAPE
Train Score: 0.260 RMSE
Test Score: 0.255 RMSE
Train Score: 0.061 MAPE
Test Score: 0.067 MAPE
Train Score: 0.237 RMSE
Test Score: 0.204 RMSE
Train Score: 0.059 MAPE
Test Score: 0.051 MAPE
Train Score: 0.035 RMSE
Test Score: 0.033 RMSE
Train Score: 0.043 MAPE
Test Score: 0.046 MAPE
Train Score: 0.038 RMSE
Test Score: 0.058 RMSE
Train Score: 0.047 MAPE
Test Score: 0.065 MAPE
Train Score: 0.040 RMSE
Test Score: 0.023 RMSE
Train Score: 0.045 MAPE
Test Score: 0.028 MAPE
Train Score: 0.033 RMSE
Test Score: 0.025 RMSE
Train Score: 0.040 MAPE
Test Score: 0.031 MAPE
In [18]:
for t in ick_cut:
    lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t, 3, " with Time Steps")
Train Score: 1.997 RMSE
Test Score: 1.364 RMSE
Train Score: 0.050 MAPE
Test Score: 0.037 MAPE
Train Score: 2.058 RMSE
Test Score: 1.371 RMSE
Train Score: 0.054 MAPE
Test Score: 0.037 MAPE
Train Score: 2.222 RMSE
Test Score: 3.395 RMSE
Train Score: 0.061 MAPE
Test Score: 0.091 MAPE
Train Score: 2.791 RMSE
Test Score: 2.010 RMSE
Train Score: 0.075 MAPE
Test Score: 0.056 MAPE
Train Score: 0.218 RMSE
Test Score: 0.088 RMSE
Train Score: 0.067 MAPE
Test Score: 0.031 MAPE
Train Score: 0.221 RMSE
Test Score: 0.327 RMSE
Train Score: 0.079 MAPE
Test Score: 0.129 MAPE
Train Score: 0.203 RMSE
Test Score: 0.166 RMSE
Train Score: 0.072 MAPE
Test Score: 0.064 MAPE
Train Score: 0.206 RMSE
Test Score: 0.136 RMSE
Train Score: 0.071 MAPE
Test Score: 0.054 MAPE
Train Score: 0.247 RMSE
Test Score: 0.168 RMSE
Train Score: 0.055 MAPE
Test Score: 0.041 MAPE
Train Score: 0.221 RMSE
Test Score: 0.141 RMSE
Train Score: 0.051 MAPE
Test Score: 0.035 MAPE
Train Score: 0.260 RMSE
Test Score: 0.249 RMSE
Train Score: 0.060 MAPE
Test Score: 0.064 MAPE
Train Score: 0.241 RMSE
Test Score: 0.229 RMSE
Train Score: 0.060 MAPE
Test Score: 0.058 MAPE
Train Score: 0.037 RMSE
Test Score: 0.036 RMSE
Train Score: 0.048 MAPE
Test Score: 0.051 MAPE
Train Score: 0.036 RMSE
Test Score: 0.070 RMSE
Train Score: 0.043 MAPE
Test Score: 0.091 MAPE
Train Score: 0.039 RMSE
Test Score: 0.023 RMSE
Train Score: 0.044 MAPE
Test Score: 0.029 MAPE
Train Score: 0.035 RMSE
Test Score: 0.028 RMSE
Train Score: 0.042 MAPE
Test Score: 0.036 MAPE
In [19]:
for t in ick_cut:
    lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t, 3, " with Memory Between Batches")
Train Score: 2.498 RMSE
Test Score: 1.992 RMSE
Train Score: 0.067 MAPE
Test Score: 0.053 MAPE
Train Score: 2.265 RMSE
Test Score: 1.927 RMSE
Train Score: 0.059 MAPE
Test Score: 0.055 MAPE
Train Score: 3.055 RMSE
Test Score: 3.417 RMSE
Train Score: 0.088 MAPE
Test Score: 0.096 MAPE
Train Score: 3.402 RMSE
Test Score: 3.985 RMSE
Train Score: 0.089 MAPE
Test Score: 0.128 MAPE
Train Score: 0.219 RMSE
Test Score: 0.093 RMSE
Train Score: 0.068 MAPE
Test Score: 0.032 MAPE
Train Score: 0.281 RMSE
Test Score: 0.688 RMSE
Train Score: 0.091 MAPE
Test Score: 0.292 MAPE
Train Score: 0.285 RMSE
Test Score: 0.158 RMSE
Train Score: 0.108 MAPE
Test Score: 0.061 MAPE
Train Score: 0.310 RMSE
Test Score: 0.498 RMSE
Train Score: 0.134 MAPE
Test Score: 0.209 MAPE
Train Score: 0.303 RMSE
Test Score: 0.223 RMSE
Train Score: 0.065 MAPE
Test Score: 0.057 MAPE
Train Score: 0.320 RMSE
Test Score: 0.172 RMSE
Train Score: 0.071 MAPE
Test Score: 0.043 MAPE
Train Score: 0.262 RMSE
Test Score: 0.224 RMSE
Train Score: 0.062 MAPE
Test Score: 0.057 MAPE
Train Score: 0.293 RMSE
Test Score: 0.312 RMSE
Train Score: 0.078 MAPE
Test Score: 0.083 MAPE
Train Score: 0.035 RMSE
Test Score: 0.061 RMSE
Train Score: 0.041 MAPE
Test Score: 0.062 MAPE
Train Score: 0.053 RMSE
Test Score: 0.065 RMSE
Train Score: 0.068 MAPE
Test Score: 0.077 MAPE
Train Score: 0.050 RMSE
Test Score: 0.045 RMSE
Train Score: 0.058 MAPE
Test Score: 0.062 MAPE
Train Score: 0.044 RMSE
Test Score: 0.037 RMSE
Train Score: 0.053 MAPE
Test Score: 0.048 MAPE
In [20]:
for t in ick_cut:
    lstm_reg(scaler.fit_transform(initial_close_cut[t][['close']].astype('float64')), t, 2, " with Memory Between Batches", "Stacked ")
Train Score: 2.610 RMSE
Test Score: 2.916 RMSE
Train Score: 0.070 MAPE
Test Score: 0.064 MAPE
Train Score: 2.494 RMSE
Test Score: 2.015 RMSE
Train Score: 0.066 MAPE
Test Score: 0.057 MAPE
Train Score: 4.110 RMSE
Test Score: 4.117 RMSE
Train Score: 0.123 MAPE
Test Score: 0.123 MAPE
Train Score: 3.494 RMSE
Test Score: 4.550 RMSE
Train Score: 0.095 MAPE
Test Score: 0.147 MAPE
Train Score: 0.219 RMSE
Test Score: 0.093 RMSE
Train Score: 0.069 MAPE
Test Score: 0.032 MAPE
Train Score: 0.248 RMSE
Test Score: 1.050 RMSE
Train Score: 0.086 MAPE
Test Score: 0.441 MAPE
Train Score: 0.359 RMSE
Test Score: 0.193 RMSE
Train Score: 0.145 MAPE
Test Score: 0.075 MAPE
Train Score: 0.281 RMSE
Test Score: 0.795 RMSE
Train Score: 0.114 MAPE
Test Score: 0.333 MAPE
Train Score: 0.244 RMSE
Test Score: 0.200 RMSE
Train Score: 0.055 MAPE
Test Score: 0.049 MAPE
Train Score: 0.333 RMSE
Test Score: 0.279 RMSE
Train Score: 0.078 MAPE
Test Score: 0.072 MAPE
Train Score: 0.264 RMSE
Test Score: 0.240 RMSE
Train Score: 0.062 MAPE
Test Score: 0.061 MAPE
Train Score: 0.253 RMSE
Test Score: 0.658 RMSE
Train Score: 0.064 MAPE
Test Score: 0.156 MAPE
Train Score: 0.035 RMSE
Test Score: 0.118 RMSE
Train Score: 0.043 MAPE
Test Score: 0.176 MAPE
Train Score: 0.045 RMSE
Test Score: 0.092 RMSE
Train Score: 0.057 MAPE
Test Score: 0.097 MAPE
Train Score: 0.058 RMSE
Test Score: 0.067 RMSE
Train Score: 0.070 MAPE
Test Score: 0.092 MAPE
Train Score: 0.042 RMSE
Test Score: 0.075 RMSE
Train Score: 0.051 MAPE
Test Score: 0.102 MAPE

ARIMA

In [21]:
GOOG_arima = pandas.read_csv('GOOG_stock_sample-2/GOOG_1min_sample_cordates.csv', sep=";", header=0, parse_dates=[0], index_col=0, squeeze = True)
GOOG_arima.name = "GOOG"
In [22]:
FDX_arima = pandas.read_csv('FDX_stock_sample/FDX_1min_sample_cordates.csv', sep=";", header=0, parse_dates=[0], index_col=0, squeeze = True)
FDX_arima.name = "FDX"
In [23]:
GS_arima = pandas.read_csv('GS_stock_sample/GS_1min_sample_cordates.csv', sep=";", header=0, parse_dates=[0], index_col=0, squeeze = True)
GS_arima.name = "GS"
In [24]:
KO_arima = pandas.read_csv('KO_stock_sample/KO_1min_sample_cordates.csv', sep=";", header=0, parse_dates=[0], index_col=0, squeeze = True)
KO_arima.name = "KO"
In [25]:
initial_arima = {}
initial_arima['GOOG'] = GOOG_arima
initial_arima['FDX'] = FDX_arima
initial_arima['GS'] = GS_arima
initial_arima['KO'] = KO_arima
iarimak = list(initial_arima.keys())
In [26]:
initial_arima_cut = {}
for t in iarimak:
    for k in range(4,8) :
            keydate = t + " Stock" + " Apr " + str(k)
            initial_arima_cut[keydate] = initial_arima[t].iloc[int(1960/5*(k-4)):int(1960/5*(k-3))]
iarimak_cut = list(initial_arima_cut.keys())
In [27]:
arima_rmse_train = {}
arima_rmse_test = {}
arima_mape_train = {}
arima_mape_test = {}
In [28]:
from statsmodels.tsa.arima.model import ARIMA
from math import sqrt
def arima_reg(dataframe, stockname, plotname = "", isstacked = ""):
    dataframe = pandas.DataFrame(dataframe.iloc[0:392,3])
    dataframe.index = dataframe.index.to_period('M')
    # split into train and test sets
    X = dataframe.values
    size = int(len(X) * 0.6)
    train, test = X[0:size], X[size:len(X)]
    history = [x for x in train]
    predictions = list()
    # walk-forward validation
    for t in range(len(test)):
        model = ARIMA(history, order=(1,0,1))
        model_fit = model.fit()
        output = model_fit.forecast()
        yhat = output[0]
        predictions.append(yhat)
        obs = test[t]
        history.append(obs)
        #print('predicted=%f, expected=%f' % (yhat, obs))
        # evaluate forecasts
    predictions_train = list()
    history_train = list(train)
    for t in range(len(train)):
        model = ARIMA(history_train, order=(1,0,1))
        model_fit = model.fit()
        output = model_fit.forecast()
        yhat = output[0]
        predictions_train.append(yhat)
        obs = train[t]
        history_train.append(obs)
    train_rmse = sqrt(mean_squared_error(train[1:len(train)], numpy.reshape(predictions_train, (235,1))[1:len(train)]))
    test_rmse = sqrt(mean_squared_error(test, predictions))
    trainScore_mape = mean_absolute_percentage_error(train[1:len(train)], numpy.reshape(predictions_train, (235,1))[1:len(train)])*100
    testScore_mape = mean_absolute_percentage_error(test, predictions)*100
    arima_rmse_train[stockname] = train_rmse
    arima_rmse_test[stockname] = test_rmse
    arima_mape_train[stockname] = trainScore_mape 
    arima_mape_test[stockname] = testScore_mape
    print('Train Score: %.3f RMSE' % (train_rmse))
    print('Test Score: %.3f RMSE' % (test_rmse))
    print('Train Score: %.3f MAPE' % (trainScore_mape))
    print('Test Score: %.3f MAPE' % (testScore_mape))
    # plot forecasts against actual outcomes

    testPredictPlot = numpy.empty_like(X)
    testPredictPlot[:, :] = numpy.nan
    testPredictPlot[len(train):len(X), :] = numpy.reshape(predictions, (157,1))

    trainPredictPlot = numpy.empty_like(X)
    trainPredictPlot[:, :] = numpy.nan
    trainPredictPlot[0:len(predictions_train), :] = numpy.reshape(predictions_train, (235,1))


    #plt.plot(trainPredictPlot[1:len(trainPredictPlot)], color="red")
    #plt.plot(testPredictPlot[1:len(testPredictPlot)], color="black")
    #plt.plot(predictionsPredictPlot[1:len(predictionsPredictPlot)], color = "green")
    #plt.plot(predictions_trainPredictPlot[1:len(predictions_trainPredictPlot)], color = "blue")
    #plt.show

    fig = px.line(X[1:len(X)], color_discrete_sequence=["black"])
    fig.add_traces(
        list(px.line(trainPredictPlot[1:len(trainPredictPlot)], color_discrete_sequence=["#409a73"]).select_traces())
    )
    fig.add_traces(
        list(px.line(testPredictPlot[1:len(testPredictPlot)], color_discrete_sequence=["#f34b3a"]).select_traces())
    )
    fig.update_layout(
        title= isstacked + 'ARIMA model prediction' + plotname,
        yaxis_title= stockname, 
        xaxis_title = "N. of observation"
    )
    fig.update_layout(plot_bgcolor="#E5E4E2")
    fig.update_layout(showlegend=False)

    fig.show()
In [29]:
for t in iarimak_cut:
    arima_reg(initial_arima_cut[t], t)
Train Score: 2.144 RMSE
Test Score: 1.164 RMSE
Train Score: 0.051 MAPE
Test Score: 0.030 MAPE
Train Score: 2.015 RMSE
Test Score: 1.302 RMSE
Train Score: 0.052 MAPE
Test Score: 0.033 MAPE
Train Score: 2.152 RMSE
Test Score: 2.985 RMSE
Train Score: 0.057 MAPE
Test Score: 0.076 MAPE
Train Score: 2.858 RMSE
Test Score: 1.993 RMSE
Train Score: 0.073 MAPE
Test Score: 0.053 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.236 RMSE
Test Score: 0.084 RMSE
Train Score: 0.069 MAPE
Test Score: 0.029 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.214 RMSE
Test Score: 0.124 RMSE
Train Score: 0.075 MAPE
Test Score: 0.046 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.200 RMSE
Test Score: 0.166 RMSE
Train Score: 0.071 MAPE
Test Score: 0.064 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.202 RMSE
Test Score: 0.136 RMSE
Train Score: 0.069 MAPE
Test Score: 0.051 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.247 RMSE
Test Score: 0.134 RMSE
Train Score: 0.055 MAPE
Test Score: 0.032 MAPE
Train Score: 0.221 RMSE
Test Score: 0.136 RMSE
Train Score: 0.051 MAPE
Test Score: 0.033 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.270 RMSE
Test Score: 0.219 RMSE
Train Score: 0.060 MAPE
Test Score: 0.055 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.238 RMSE
Test Score: 0.186 RMSE
Train Score: 0.059 MAPE
Test Score: 0.046 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.036 RMSE
Test Score: 0.018 RMSE
Train Score: 0.041 MAPE
Test Score: 0.021 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.036 RMSE
Test Score: 0.025 RMSE
Train Score: 0.041 MAPE
Test Score: 0.029 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.042 RMSE
Test Score: 0.022 RMSE
Train Score: 0.047 MAPE
Test Score: 0.028 MAPE
/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

/Users/mac/miniconda3/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning:

Maximum Likelihood optimization failed to converge. Check mle_retvals

Train Score: 0.034 RMSE
Test Score: 0.021 RMSE
Train Score: 0.041 MAPE
Test Score: 0.025 MAPE

CNN

In [30]:
initial_close_cut_xgboost = {}
for t in ik_cut:
    initial_close_cut_xgboost[t] = pandas.DataFrame(initial_cut[t].set_index('date').iloc[0:392,3])
    initial_close_cut_xgboost[t].name = str(t)
iccutxgbk = list(initial_close_cut_xgboost.keys())
In [31]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt


from keras.models import Sequential
from keras.layers import Dense,RepeatVector
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D



from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
In [32]:
cnn_rmse_train = {}
cnn_rmse_test = {}
cnn_mape_train = {}
cnn_mape_test = {}
In [33]:
scaler = MinMaxScaler(feature_range=(-1,1))
def cnn_reg(dataset, stockname):
    df = dataset
    df_1=df.values
    df_1=df_1.astype('float64')
    numpy.random.seed(7)
    scaler = MinMaxScaler(feature_range=(-1,1))
    ts = scaler.fit_transform(df_1)
    timestep = 10

    X= []
    Y=[]

    raw_data=ts

    for i in range(len(raw_data)- (timestep)):
        X.append(raw_data[i:i+timestep])
        Y.append(raw_data[i+timestep])


    X=np.asanyarray(X)
    Y=np.asanyarray(Y)


    k = 234
    Xtrain = X[:k,:,:]  
    Ytrain = Y[:k] 
    numpy.random.seed(7)
    model = Sequential()
    model.add(Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(10, 1)))
    model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
    model.add(Conv1D(filters=128, kernel_size=2, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    # fit model
    model.fit(Xtrain, Ytrain, epochs=100, verbose=0)
    Xtest = X[k:,:,:]  
    Ytest= Y[k:] 
    preds = model.predict(Xtest)
    preds = scaler.inverse_transform(preds)

    Ytest=np.asanyarray(Ytest)  
    Ytest=Ytest.reshape(-1,1) 
    Ytest = scaler.inverse_transform(Ytest)


    Ytrain=np.asanyarray(Ytrain)  
    Ytrain=Ytrain.reshape(-1,1) 
    Ytrain = scaler.inverse_transform(Ytrain)
    preds_train = model.predict(Xtrain)
    preds_train = scaler.inverse_transform(preds_train)
    trainScore = sqrt(mean_squared_error(Ytrain,preds_train))
    testScore = sqrt(mean_squared_error(Ytest,preds))
    trainScore_mape = mean_absolute_percentage_error(Ytrain,preds_train)*100
    testScore_mape = mean_absolute_percentage_error(Ytest,preds)*100
    cnn_rmse_train[stockname] = trainScore
    cnn_rmse_test[stockname] = testScore
    cnn_mape_train[stockname] = trainScore_mape 
    cnn_mape_test[stockname] = testScore_mape
    testPredictPlot = numpy.empty_like(raw_data)
    testPredictPlot[:, :] = numpy.nan
    testPredictPlot[len(Ytrain):len(raw_data)- (timestep), :] = numpy.reshape(preds, (148,1))

    testPlot = numpy.empty_like(raw_data)
    testPlot[:, :] = numpy.nan
    testPlot[len(Ytrain):len(raw_data)- (timestep), :] = numpy.reshape(Ytest, (148,1))
    
    print('Train Score: %.3f RMSE' % trainScore)
    print('Test Score: %.3f RMSE' % testScore)
    print('Train Score: %.3f MAPE' % (trainScore_mape))
    print('Test Score: %.3f MAPE' % (testScore_mape))
    fig = px.line(Ytrain, color_discrete_sequence=["black"])
    fig.add_traces(list(px.line(testPlot, color_discrete_sequence=["black"]).select_traces()))
    fig.add_traces(list(px.line(preds_train, color_discrete_sequence=["#409a73"]).select_traces()))
    
    fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
    
    #fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
    #fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
    fig.update_layout(title= 'CNN model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
    fig.update_layout(plot_bgcolor="#E5E4E2")
    fig.update_layout(showlegend=False)
    fig.show()
In [34]:
for t in iccutxgbk:
    cnn_reg(initial_close_cut_xgboost[t], t)
Train Score: 2.098 RMSE
Test Score: 2.478 RMSE
Train Score: 0.058 MAPE
Test Score: 0.067 MAPE
Train Score: 1.468 RMSE
Test Score: 2.194 RMSE
Train Score: 0.038 MAPE
Test Score: 0.062 MAPE
Train Score: 1.373 RMSE
Test Score: 5.085 RMSE
Train Score: 0.037 MAPE
Test Score: 0.144 MAPE
Train Score: 1.557 RMSE
Test Score: 5.133 RMSE
Train Score: 0.041 MAPE
Test Score: 0.156 MAPE
Train Score: 0.084 RMSE
Test Score: 0.137 RMSE
Train Score: 0.030 MAPE
Test Score: 0.048 MAPE
Train Score: 0.150 RMSE
Test Score: 0.270 RMSE
Train Score: 0.054 MAPE
Test Score: 0.111 MAPE
Train Score: 0.126 RMSE
Test Score: 0.298 RMSE
Train Score: 0.048 MAPE
Test Score: 0.115 MAPE
Train Score: 0.163 RMSE
Test Score: 1.086 RMSE
Train Score: 0.060 MAPE
Test Score: 0.468 MAPE
Train Score: 0.160 RMSE
Test Score: 0.311 RMSE
Train Score: 0.035 MAPE
Test Score: 0.075 MAPE
Train Score: 0.156 RMSE
Test Score: 0.251 RMSE
Train Score: 0.037 MAPE
Test Score: 0.061 MAPE
Train Score: 0.069 RMSE
Test Score: 0.472 RMSE
Train Score: 0.015 MAPE
Test Score: 0.118 MAPE
Train Score: 0.212 RMSE
Test Score: 0.709 RMSE
Train Score: 0.054 MAPE
Test Score: 0.176 MAPE
Train Score: 0.023 RMSE
Test Score: 0.030 RMSE
Train Score: 0.026 MAPE
Test Score: 0.038 MAPE
Train Score: 0.023 RMSE
Test Score: 0.131 RMSE
Train Score: 0.027 MAPE
Test Score: 0.154 MAPE
Train Score: 0.024 RMSE
Test Score: 0.040 RMSE
Train Score: 0.028 MAPE
Test Score: 0.050 MAPE
Train Score: 0.024 RMSE
Test Score: 0.078 RMSE
Train Score: 0.028 MAPE
Test Score: 0.107 MAPE

XGBoost

In [35]:
from numpy import loadtxt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from math import sqrt
from numpy import asarray
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from matplotlib import pyplot

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
In [36]:
xgb_rmse_train = {}
xgb_rmse_test = {}
xgb_mape_train = {}
xgb_mape_test = {}
In [37]:
# transform a time series dataset into a supervised learning dataset
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols = list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    # put it all together
    agg = concat(cols, axis=1)
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg.values

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test, :], data[-n_test:, :]

# fit an xgboost model and make a one step prediction
def xgboost_forecast(train, testX):
    # transform list into array
    train = asarray(train)
    # split into input and output columns
    trainX, trainy = train[:, :-1], train[:, -1]
    # fit model
    model = XGBRegressor(objective='reg:squarederror', n_estimators=30)
    model.fit(trainX, trainy)
    # make a one-step prediction
    yhat = model.predict(asarray([testX]))
    return yhat[0]

# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, myset, stockname):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # split test row into input and output columns
        testX, testy = test[i, :-1], test[i, -1]
        # fit model on history and make a prediction
        yhat = xgboost_forecast(history, testX)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        # summarize progress
    # estimate prediction error
    predictions_train = list()
    history_train = list(train)
    for i in range(len(train)):
        trainX, trainy = train[i, :-1], train[i, -1]
        # fit model on history and make a prediction
        yhat_train = xgboost_forecast(history_train, trainX)
        predictions_train.append(yhat_train)
        # add actual observation to history for the next loop
        history_train.append(train[i])
    trainScore = sqrt(mean_squared_error(train[1:len(train), -1], numpy.array(predictions_train[1:len(predictions_train)])))
    testScore = sqrt(mean_squared_error(test[:, -1], predictions))
    trainScore_mape = mean_absolute_percentage_error(train[1:len(train), -1], numpy.array(predictions_train[1:len(predictions_train)]))*100
    testScore_mape = mean_absolute_percentage_error(test[:, -1], predictions)*100
    xgb_rmse_train[stockname] = trainScore
    xgb_rmse_test[stockname] = testScore
    xgb_mape_train[stockname] = trainScore_mape 
    xgb_mape_test[stockname] = testScore_mape
    testPredictPlot = numpy.empty_like(myset)
    testPredictPlot[:, :] = numpy.nan
    testPredictPlot[len(train)+10:len(myset), :] = numpy.reshape(predictions, (157,1))
    trainPredictPlot = numpy.empty_like(myset)
    trainPredictPlot[:, :] = numpy.nan
    trainPredictPlot[0:len(predictions_train), :] = numpy.reshape(predictions_train, (225,1))
    PredictPlot = numpy.empty_like(myset)
    PredictPlot[:, :] = numpy.nan
    PredictPlot[225:len(myset)-10, :] = numpy.reshape(myset[225:len(myset)-10], (157,1))
    #return error, testPredictPlot, error_train, trainPredictPlot, PredictPlot
    print('Train Score: %.3f RMSE' % trainScore)
    print('Test Score: %.3f RMSE' % testScore)
    print('Train Score: %.3f MAPE' % (trainScore_mape))
    print('Test Score: %.3f MAPE' % (testScore_mape))
    fig = px.line(train[1:len(train), -1], color_discrete_sequence=["black"])
    fig.add_traces(list(px.line(PredictPlot, color_discrete_sequence=["black"]).select_traces()))
    fig.add_traces(list(px.line(trainPredictPlot, color_discrete_sequence=["#409a73"]).select_traces()))
    
    fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
    
    #fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
    #fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
    fig.update_layout(title= 'XGBoost model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
    fig.update_layout(plot_bgcolor="#E5E4E2")
    fig.update_layout(showlegend=False)
    fig.show()
In [38]:
def xgboost_reg(dataset):
    data = series_to_supervised(dataset, n_in=10)
    walk_forward_validation(data, 157, dataset.values, dataset.name)
In [39]:
for t in iccutxgbk:
    xgboost_reg(initial_close_cut_xgboost[t])
Train Score: 0.627 RMSE
Test Score: 1.837 RMSE
Train Score: 0.015 MAPE
Test Score: 0.049 MAPE
Train Score: 0.759 RMSE
Test Score: 1.945 RMSE
Train Score: 0.020 MAPE
Test Score: 0.053 MAPE
Train Score: 0.787 RMSE
Test Score: 4.294 RMSE
Train Score: 0.020 MAPE
Test Score: 0.122 MAPE
Train Score: 0.993 RMSE
Test Score: 3.873 RMSE
Train Score: 0.026 MAPE
Test Score: 0.112 MAPE
Train Score: 0.060 RMSE
Test Score: 0.093 RMSE
Train Score: 0.021 MAPE
Test Score: 0.032 MAPE
Train Score: 0.083 RMSE
Test Score: 1.494 RMSE
Train Score: 0.029 MAPE
Test Score: 0.594 MAPE
Train Score: 0.072 RMSE
Test Score: 0.213 RMSE
Train Score: 0.026 MAPE
Test Score: 0.082 MAPE
Train Score: 0.081 RMSE
Test Score: 1.027 RMSE
Train Score: 0.029 MAPE
Test Score: 0.421 MAPE
Train Score: 0.084 RMSE
Test Score: 0.229 RMSE
Train Score: 0.019 MAPE
Test Score: 0.055 MAPE
Train Score: 0.082 RMSE
Test Score: 0.208 RMSE
Train Score: 0.019 MAPE
Test Score: 0.053 MAPE
Train Score: 0.082 RMSE
Test Score: 0.352 RMSE
Train Score: 0.018 MAPE
Test Score: 0.092 MAPE
Train Score: 0.087 RMSE
Test Score: 0.611 RMSE
Train Score: 0.022 MAPE
Test Score: 0.148 MAPE
Train Score: 0.014 RMSE
Test Score: 0.065 RMSE
Train Score: 0.016 MAPE
Test Score: 0.089 MAPE
Train Score: 0.014 RMSE
Test Score: 0.130 RMSE
Train Score: 0.017 MAPE
Test Score: 0.130 MAPE
Train Score: 0.015 RMSE
Test Score: 0.033 RMSE
Train Score: 0.017 MAPE
Test Score: 0.042 MAPE
Train Score: 0.014 RMSE
Test Score: 0.180 RMSE
Train Score: 0.016 MAPE
Test Score: 0.247 MAPE

GRU

In [40]:
# load libraries
import pandas, time
import numpy as np
from keras.layers.recurrent import GRU
from keras.layers.core import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop
from keras.models import Sequential
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler 
In [41]:
gru_rmse_train = {}
gru_rmse_test = {}
gru_mape_train = {}
gru_mape_test = {}
In [42]:
from keras.models import clone_model
from keras.losses import MeanAbsolutePercentageError
In [43]:
def gru_reg(dataset, stockname):    

    # load the dataset

    # normalize the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)

    # split into train and test sets
    train_size = int(len(dataset) * 0.6) 
    train_dataset, test_dataset = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

    # Window -> X timestep back
    step_back = 1
    X_train, Y_train = [], []
    for i in range(len(train_dataset)-step_back - 1):
        a = train_dataset[i:(i+step_back), 0]
        X_train.append(a)
        Y_train.append(train_dataset[i + step_back, 0])
    X_train = np.array(X_train); Y_train = np.array(Y_train);
    
    X_test, Y_test = [], []
    for i in range(len(test_dataset)-step_back - 1):
        a = test_dataset[i:(i+step_back), 0]
        X_test.append(a)
        Y_test.append(test_dataset[i + step_back, 0])
    X_test = np.array(X_test); Y_test = np.array(Y_test);

    X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
    X_test  = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

    model = Sequential()
    model.add(GRU(32, input_shape=(1, step_back)))
    model.add(Dense(units = 512, activation = 'relu'))
    model.add(Dropout(0.2))
    model.add(Dense(units = 1, activation = 'linear'))
    model.compile(loss='mean_squared_error', optimizer=RMSprop(lr = 0.01))
    model.fit(X_train, Y_train, epochs=20, batch_size=2, verbose=0) 

    # Estimate model performance


    # Evaluate the skill of the Trained model
    trainPredict = model.predict(X_train)
    testPredict  = model.predict(X_test)
    

    # invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    testPredict = scaler.inverse_transform(testPredict)
    Y_train = scaler.inverse_transform(Y_train.reshape(233,1))
    Y_test = scaler.inverse_transform(Y_test.reshape(155,1))
    
    trainScore = sqrt(mean_absolute_error(Y_train, trainPredict))
    testScore = sqrt(mean_absolute_error(Y_test, testPredict)) 
    trainScore_mape = mean_absolute_percentage_error(Y_train, trainPredict)*100
    testScore_mape = mean_absolute_percentage_error(Y_test, testPredict)*100
    print('Train Score: %.3f RMSE' % trainScore)
    print('Test Score: %.3f RMSE' % testScore)
    print('Train Score: %.3f MAPE' % (trainScore_mape))
    print('Test Score: %.3f MAPE' % (testScore_mape))
    
    gru_rmse_train[stockname] = trainScore
    gru_rmse_test[stockname] = testScore
    gru_mape_train[stockname] = trainScore_mape 
    gru_mape_test[stockname] = testScore_mape
    
    # shift train predictions for plotting
    trainPredictPlot = np.empty_like(dataset)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[step_back:len(trainPredict)+step_back, :] = trainPredict

    # shift test predictions for plotting
    testPredictPlot = np.empty_like(dataset)
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(trainPredict)+(step_back*2)+1:len(dataset)-1, :] = testPredict
    fig = px.line(scaler.inverse_transform(dataset), color_discrete_sequence=["black"])
    fig.add_traces(list(px.line(trainPredictPlot, color_discrete_sequence=["#409a73"]).select_traces()))
    
    fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
    
    #fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
    #fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
    fig.update_layout(title= 'GRU model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
    fig.update_layout(plot_bgcolor="#E5E4E2")
    fig.update_layout(showlegend=False)
    fig.show()
In [44]:
for t in iccutxgbk:
    gru_reg(initial_close_cut_xgboost[t], t)
Train Score: 1.314 RMSE
Test Score: 1.164 RMSE
Train Score: 0.060 MAPE
Test Score: 0.047 MAPE
Train Score: 1.650 RMSE
Test Score: 1.385 RMSE
Train Score: 0.096 MAPE
Test Score: 0.068 MAPE
Train Score: 1.594 RMSE
Test Score: 1.576 RMSE
Train Score: 0.092 MAPE
Test Score: 0.090 MAPE
Train Score: 1.484 RMSE
Test Score: 1.708 RMSE
Train Score: 0.081 MAPE
Test Score: 0.107 MAPE
Train Score: 0.403 RMSE
Test Score: 0.271 RMSE
Train Score: 0.074 MAPE
Test Score: 0.034 MAPE
Train Score: 0.452 RMSE
Test Score: 0.855 RMSE
Train Score: 0.095 MAPE
Test Score: 0.345 MAPE
Train Score: 0.638 RMSE
Test Score: 0.397 RMSE
Train Score: 0.197 MAPE
Test Score: 0.077 MAPE
Train Score: 0.457 RMSE
Test Score: 0.815 RMSE
Train Score: 0.104 MAPE
Test Score: 0.326 MAPE
Train Score: 0.624 RMSE
Test Score: 0.529 RMSE
Train Score: 0.119 MAPE
Test Score: 0.085 MAPE
Train Score: 0.592 RMSE
Test Score: 0.410 RMSE
Train Score: 0.107 MAPE
Test Score: 0.052 MAPE
Train Score: 0.554 RMSE
Test Score: 0.665 RMSE
Train Score: 0.096 MAPE
Test Score: 0.140 MAPE
Train Score: 0.573 RMSE
Test Score: 0.568 RMSE
Train Score: 0.106 MAPE
Test Score: 0.103 MAPE
Train Score: 0.234 RMSE
Test Score: 0.340 RMSE
Train Score: 0.088 MAPE
Test Score: 0.185 MAPE
Train Score: 0.259 RMSE
Test Score: 0.176 RMSE
Train Score: 0.106 MAPE
Test Score: 0.049 MAPE
Train Score: 0.244 RMSE
Test Score: 0.223 RMSE
Train Score: 0.095 MAPE
Test Score: 0.079 MAPE
Train Score: 0.181 RMSE
Test Score: 0.268 RMSE
Train Score: 0.052 MAPE
Test Score: 0.113 MAPE

Gradient Boosting

In [45]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from lightgbm import LGBMRegressor
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf
from scipy.stats import t
from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.metrics import mean_absolute_error
In [46]:
gb_rmse_train = {}
gb_rmse_test = {}
gb_mape_train = {}
gb_mape_test = {}
In [47]:
def gradient_boost_reg(dataset, stockname):


    df = pandas.DataFrame(dataset.iloc[0:392,:3])

    horizon=int(len(df)*0.4)+1
    X = df.drop('open', axis=1)
    y = df['open']

    #take last week of the dataset for validation
    X_train, X_test = X.iloc[:-horizon,:], X.iloc[-horizon:,:]
    y_train, y_test = y.iloc[:-horizon], y.iloc[-horizon:]


    #create, train and do inference of the model
    model = LGBMRegressor(random_state=7)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)

    #calculate RMSE  
    model.fit(X_train, y_train)
    predictions_train = model.predict(X_train)
    trainScore = sqrt(mean_absolute_error(y_train, predictions_train))
    testScore = sqrt(mean_absolute_error(y_test, predictions)) 
    trainScore_mape = mean_absolute_percentage_error(y_train, predictions_train)*100
    testScore_mape = mean_absolute_percentage_error(y_test, predictions)*100
    gb_rmse_train[stockname] = trainScore
    gb_rmse_test[stockname] = testScore
    gb_mape_train[stockname] = trainScore_mape 
    gb_mape_test[stockname] = testScore_mape

    print('Train Score: %.3f RMSE' % trainScore)
    print('Test Score: %.3f RMSE' % testScore)
    print('Train Score: %.3f MAPE' % (trainScore_mape))
    print('Test Score: %.3f MAPE' % (testScore_mape))
    
    testPlot = np.empty_like(df)
    testPlot[:, :] = np.nan
    testPlot[len(numpy.array(y_train)):len(df), :] = numpy.reshape(numpy.array(y_test), (157,1))
    
    testPredictPlot = np.empty_like(df)
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(numpy.array(y_train)):len(df), :] = numpy.reshape(predictions, (157,1))
    

    #plot reality vs prediction for the last week of the dataset
    fig = px.line(testPlot, color_discrete_sequence=["black"])
    fig.add_traces(list(px.line(numpy.array(y_train), color_discrete_sequence=["black"]).select_traces()))
    fig.add_traces(list(px.line(predictions_train, color_discrete_sequence=["#409a73"]).select_traces()))
    
    fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
    
    #fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
    #fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
    fig.update_layout(title= 'Gradient Boosting model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
    fig.update_layout(plot_bgcolor="#E5E4E2")
    fig.update_layout(showlegend=False)
    fig.show()
    
In [48]:
for t in iarimak_cut[0:4]:
    gradient_boost_reg(initial_arima_cut[t], t)
Train Score: 1.223 RMSE
Test Score: 1.216 RMSE
Train Score: 0.052 MAPE
Test Score: 0.052 MAPE
Train Score: 0.795 RMSE
Test Score: 1.108 RMSE
Train Score: 0.022 MAPE
Test Score: 0.043 MAPE
Train Score: 1.061 RMSE
Test Score: 1.452 RMSE
Train Score: 0.041 MAPE
Test Score: 0.077 MAPE
Train Score: 0.919 RMSE
Test Score: 1.111 RMSE
Train Score: 0.031 MAPE
Test Score: 0.045 MAPE
In [49]:
for t in iarimak_cut[4:8]:
    gradient_boost_reg(initial_arima_cut[t], t)
Train Score: 0.305 RMSE
Test Score: 0.207 RMSE
Train Score: 0.043 MAPE
Test Score: 0.020 MAPE
Train Score: 0.305 RMSE
Test Score: 1.104 RMSE
Train Score: 0.043 MAPE
Test Score: 0.576 MAPE
Train Score: 0.278 RMSE
Test Score: 0.290 RMSE
Train Score: 0.038 MAPE
Test Score: 0.041 MAPE
Train Score: 0.325 RMSE
Test Score: 0.725 RMSE
Train Score: 0.052 MAPE
Test Score: 0.257 MAPE
In [50]:
for t in iarimak_cut[8:12]:
    gradient_boost_reg(initial_arima_cut[t], t)
Train Score: 0.339 RMSE
Test Score: 0.275 RMSE
Train Score: 0.035 MAPE
Test Score: 0.023 MAPE
Train Score: 0.329 RMSE
Test Score: 0.334 RMSE
Train Score: 0.033 MAPE
Test Score: 0.034 MAPE
Train Score: 0.324 RMSE
Test Score: 0.500 RMSE
Train Score: 0.033 MAPE
Test Score: 0.079 MAPE
Train Score: 0.359 RMSE
Test Score: 0.689 RMSE
Train Score: 0.041 MAPE
Test Score: 0.151 MAPE
In [51]:
for t in iarimak_cut[12:16]:
    gradient_boost_reg(initial_arima_cut[t], t)
Train Score: 0.134 RMSE
Test Score: 0.173 RMSE
Train Score: 0.029 MAPE
Test Score: 0.048 MAPE
Train Score: 0.149 RMSE
Test Score: 0.254 RMSE
Train Score: 0.035 MAPE
Test Score: 0.103 MAPE
Train Score: 0.141 RMSE
Test Score: 0.119 RMSE
Train Score: 0.032 MAPE
Test Score: 0.022 MAPE
Train Score: 0.119 RMSE
Test Score: 0.322 RMSE
Train Score: 0.022 MAPE
Test Score: 0.164 MAPE

Random Forest

In [52]:
from numpy import asarray
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from matplotlib import pyplot
In [53]:
rf_rmse_train = {}
rf_rmse_test = {}
rf_mape_train = {}
rf_mape_test = {}
In [54]:
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test, :], data[-n_test:, :]

# walk-forward validation for univariate data
def walk_forward_validation_rf(data, n_test, myset, stockname):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # split test row into input and output columns
        testX, testy = test[i, :-1], test[i, -1]
        # fit model on history and make a prediction
        yhat = random_forest_forecast(history, testX)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    predictions_train = list()
    history_train = list(train)
    for i in range(len(train)):
        trainX, trainy = train[i, :-1], train[i, -1]
        # fit model on history and make a prediction
        yhat_train = random_forest_forecast(history_train, trainX)
        predictions_train.append(yhat_train)
        # add actual observation to history for the next loop
        history_train.append(train[i])
    trainScore = sqrt(mean_squared_error(train[1:len(train), -1], numpy.array(predictions_train[1:len(predictions_train)])))
    testScore = sqrt(mean_squared_error(test[:, -1], predictions))
    trainScore_mape = mean_absolute_percentage_error(train[1:len(train), -1], numpy.array(predictions_train[1:len(predictions_train)]))*100
    testScore_mape = mean_absolute_percentage_error(test[:, -1], predictions)*100
    rf_rmse_train[stockname] = trainScore
    rf_rmse_test[stockname] = testScore
    rf_mape_train[stockname] = trainScore_mape 
    rf_mape_test[stockname] = testScore_mape
    testPredictPlot = numpy.empty_like(myset)
    testPredictPlot[:, :] = numpy.nan
    testPredictPlot[len(train)+10:len(myset), :] = numpy.reshape(predictions, (157,1))
    trainPredictPlot = numpy.empty_like(myset)
    trainPredictPlot[:, :] = numpy.nan
    trainPredictPlot[0:len(predictions_train), :] = numpy.reshape(predictions_train, (225,1))
    PredictPlot = numpy.empty_like(myset)
    PredictPlot[:, :] = numpy.nan
    PredictPlot[225:len(myset)-10, :] = numpy.reshape(myset[225:len(myset)-10], (157,1))
    #return error, testPredictPlot, error_train, trainPredictPlot, PredictPlot
    print('Train Score: %.3f RMSE' % trainScore)
    print('Test Score: %.3f RMSE' % testScore)
    print('Train Score: %.3f MAPE' % (trainScore_mape))
    print('Test Score: %.3f MAPE' % (testScore_mape))
    fig = px.line(train[1:len(train), -1], color_discrete_sequence=["black"])
    fig.add_traces(list(px.line(PredictPlot, color_discrete_sequence=["black"]).select_traces()))
    fig.add_traces(list(px.line(trainPredictPlot, color_discrete_sequence=["#409a73"]).select_traces()))
    
    fig.add_traces(list(px.line(testPredictPlot, color_discrete_sequence=["#f34b3a"]).select_traces()))
    
    #fig.add_traces(list(px.line(test, color_discrete_sequence=["blue"]).select_traces()))
    #fig.add_traces(list(px.line(predictions, color_discrete_sequence=["green"]).select_traces()))
    fig.update_layout(title= 'Random Forest model prediction',yaxis_title= stockname, xaxis_title = "N. of observation")
    fig.update_layout(plot_bgcolor="#E5E4E2")
    fig.update_layout(showlegend=False)
    fig.show()
In [55]:
# fit an random forest model and make a one step prediction
def random_forest_forecast(train, testX):
    # transform list into array
    train = asarray(train)
    # split into input and output columns
    trainX, trainy = train[:, :-1], train[:, -1]
    # fit model
    model = RandomForestRegressor(n_estimators=100)
    model.fit(trainX, trainy)
    # make a one-step prediction
    yhat = model.predict([testX])
    return yhat[0]
In [56]:
def rf_reg(dataset):
    data = series_to_supervised(dataset, n_in=10)
    walk_forward_validation_rf(data, 157, dataset.values, dataset.name)
In [57]:
for t in iccutxgbk:
    rf_reg(initial_close_cut_xgboost[t])
Train Score: 0.790 RMSE
Test Score: 1.352 RMSE
Train Score: 0.018 MAPE
Test Score: 0.036 MAPE
Train Score: 0.862 RMSE
Test Score: 1.510 RMSE
Train Score: 0.022 MAPE
Test Score: 0.040 MAPE
Train Score: 0.816 RMSE
Test Score: 3.490 RMSE
Train Score: 0.022 MAPE
Test Score: 0.088 MAPE
Train Score: 1.004 RMSE
Test Score: 2.412 RMSE
Train Score: 0.027 MAPE
Test Score: 0.067 MAPE
Train Score: 0.081 RMSE
Test Score: 0.085 RMSE
Train Score: 0.025 MAPE
Test Score: 0.029 MAPE
Train Score: 0.086 RMSE
Test Score: 0.158 RMSE
Train Score: 0.030 MAPE
Test Score: 0.059 MAPE
Train Score: 0.075 RMSE
Test Score: 0.179 RMSE
Train Score: 0.027 MAPE
Test Score: 0.070 MAPE
Train Score: 0.068 RMSE
Test Score: 0.177 RMSE
Train Score: 0.025 MAPE
Test Score: 0.068 MAPE
Train Score: 0.093 RMSE
Test Score: 0.162 RMSE
Train Score: 0.021 MAPE
Test Score: 0.038 MAPE
Train Score: 0.089 RMSE
Test Score: 0.160 RMSE
Train Score: 0.020 MAPE
Test Score: 0.038 MAPE
Train Score: 0.092 RMSE
Test Score: 0.241 RMSE
Train Score: 0.022 MAPE
Test Score: 0.062 MAPE
Train Score: 0.091 RMSE
Test Score: 0.233 RMSE
Train Score: 0.023 MAPE
Test Score: 0.059 MAPE
Train Score: 0.013 RMSE
Test Score: 0.019 RMSE
Train Score: 0.015 MAPE
Test Score: 0.024 MAPE
Train Score: 0.013 RMSE
Test Score: 0.032 RMSE
Train Score: 0.016 MAPE
Test Score: 0.037 MAPE
Train Score: 0.016 RMSE
Test Score: 0.025 RMSE
Train Score: 0.017 MAPE
Test Score: 0.032 MAPE
Train Score: 0.012 RMSE
Test Score: 0.025 RMSE
Train Score: 0.015 MAPE
Test Score: 0.031 MAPE
In [58]:
def get_nth_key(dictionary, n=0):
    if n < 0:
        n += len(dictionary)
    for i, key in enumerate(dictionary.keys()):
        if i == n:
            return key
    raise IndexError("dictionary index out of range") 
    
get_nth_key(rf_rmse_train, 0)
Out[58]:
'GOOG Stock Apr 4'
In [59]:
keysright = {}
In [60]:
for k in range (0,4):
    for t in range(k, 15, 4):
        nkey = get_nth_key(rf_rmse_train, t)
        keysright[nkey] = nkey
    if k == 3 :
        nkey = get_nth_key(rf_rmse_train, 15)
        keysright[nkey] = nkey
In [61]:
keysright
Out[61]:
{'GOOG Stock Apr 4': 'GOOG Stock Apr 4',
 'FDX Stock Apr 4': 'FDX Stock Apr 4',
 'GS Stock Apr 4': 'GS Stock Apr 4',
 'KO Stock Apr 4': 'KO Stock Apr 4',
 'GOOG Stock Apr 5': 'GOOG Stock Apr 5',
 'FDX Stock Apr 5': 'FDX Stock Apr 5',
 'GS Stock Apr 5': 'GS Stock Apr 5',
 'KO Stock Apr 5': 'KO Stock Apr 5',
 'GOOG Stock Apr 6': 'GOOG Stock Apr 6',
 'FDX Stock Apr 6': 'FDX Stock Apr 6',
 'GS Stock Apr 6': 'GS Stock Apr 6',
 'KO Stock Apr 6': 'KO Stock Apr 6',
 'GOOG Stock Apr 7': 'GOOG Stock Apr 7',
 'FDX Stock Apr 7': 'FDX Stock Apr 7',
 'GS Stock Apr 7': 'GS Stock Apr 7',
 'KO Stock Apr 7': 'KO Stock Apr 7'}
In [62]:
dictsordered_rmse_train = {"LSTM":lstm_1_rmse_train,
                           'LSTM with Window Method': lstm_WM_rmse_train,
                           'LSTM with Time Steps': lstm_TS_rmse_train,
                           'LSTM with Memory Between Batches' :lstm_MBB_rmse_train,
                           'Stacked LSTM with Memory Between Batches' :lstm_SMBB_rmse_train,
                          'ARIMA': arima_rmse_train,
                          'CNN': cnn_rmse_train,
                          'GRU': gru_rmse_train,
                          'XGBoost': xgb_rmse_train,
                          'GB': gb_rmse_train,
                          'RF': rf_rmse_train} 
In [63]:
dictsredordered_rmse_train = {}
In [64]:
for t in list(dictsordered_rmse_train.keys()):
    dictsredordered_rmse_train[t] = {k: dictsordered_rmse_train[t][k] for k in keysright}
In [65]:
dataframe_rmse_train = pandas.DataFrame.from_dict(dictsredordered_rmse_train, orient = "index")
In [66]:
dictsordered_rmse_test = {"LSTM":lstm_1_rmse_test,
                           'LSTM with Window Method': lstm_WM_rmse_test,
                           'LSTM with Time Steps': lstm_TS_rmse_test,
                           'LSTM with Memory Between Batches' :lstm_MBB_rmse_test,
                           'Stacked LSTM with Memory Between Batches' :lstm_SMBB_rmse_test,
                          'ARIMA': arima_rmse_test,
                          'CNN': cnn_rmse_test,
                          'GRU': gru_rmse_test,
                          'XGBoost': xgb_rmse_test,
                          'GB': gb_rmse_test,
                          'RF': rf_rmse_test} 
In [67]:
dictsredordered_rmse_test = {}
In [68]:
for t in list(dictsordered_rmse_test.keys()):
    dictsredordered_rmse_test[t] = {k: dictsordered_rmse_test[t][k] for k in keysright}
In [69]:
dataframe_rmse_test = pandas.DataFrame.from_dict(dictsredordered_rmse_test, orient = "index")
In [70]:
dictsordered_mape_train = {"LSTM":lstm_1_mape_train,
                           'LSTM with Window Method': lstm_WM_mape_train,
                           'LSTM with Time Steps': lstm_TS_mape_train,
                           'LSTM with Memory Between Batches' :lstm_MBB_mape_train,
                           'Stacked LSTM with Memory Between Batches' :lstm_SMBB_mape_train,
                          'ARIMA': arima_mape_train,
                          'CNN': cnn_mape_train,
                          'GRU': gru_mape_train,
                          'XGBoost': xgb_mape_train,
                          'GB': gb_mape_train,
                          'RF': rf_mape_train}
In [71]:
dictsredordered_mape_train = {}
In [72]:
for t in list(dictsordered_mape_train.keys()):
    dictsredordered_mape_train[t] = {k: dictsordered_mape_train[t][k] for k in keysright}
In [73]:
dataframe_mape_train = pandas.DataFrame.from_dict(dictsredordered_mape_train, orient = "index")
In [74]:
dictsordered_mape_test = {"LSTM":lstm_1_mape_test,
                           'LSTM with Window Method': lstm_WM_mape_test,
                           'LSTM with Time Steps': lstm_TS_mape_test,
                           'LSTM with Memory Between Batches' :lstm_MBB_mape_test,
                           'Stacked LSTM with Memory Between Batches' :lstm_SMBB_mape_test,
                          'ARIMA': arima_mape_test,
                          'CNN': cnn_mape_test,
                          'GRU': gru_mape_test,
                          'XGBoost': xgb_mape_test,
                          'GB': gb_mape_test,
                          'RF': rf_mape_test}
In [75]:
dictsredordered_mape_test = {}
In [76]:
for t in list(dictsordered_mape_test.keys()):
    dictsredordered_mape_test[t] = {k: dictsordered_mape_test[t][k] for k in keysright}
In [77]:
dataframe_mape_test = pandas.DataFrame.from_dict(dictsredordered_mape_test, orient = "index")
In [78]:
rmse_4 = pd.concat([dataframe_rmse_train.iloc[:,0:4], dataframe_rmse_test.iloc[:,0:4]], axis=0)
In [79]:
rmses = {}
In [80]:
for t in range(0,16,4):
    dname = "rmses_" + str(3+(t+4)//4)
    rmses[dname] = pd.concat([dataframe_rmse_train.iloc[:,t:t+4], dataframe_rmse_test.iloc[:,t:t+4]], axis=0)
    rmses[dname].to_csv(dname, index = True)
In [81]:
mapes = {}
In [82]:
for t in range(0,16,4):
    dname = "mapes_" + str(3+(t+4)//4)
    mapes[dname] = pd.concat([dataframe_mape_train.iloc[:,t:t+4], dataframe_mape_test.iloc[:,t:t+4]], axis=0)
    mapes[dname].to_csv(dname, index = True)
In [ ]: